Data Science - Karse projekt

Todo

  • link til github
  • billede af opstilling
  • Højeste karse i hvert forsøg
  • arduino kode
  • billeder i github
  • En masse lækker tekst

Projektdefinering

Dette projekt er lavet i forbindelse med kurset Data Science og omhandler dataopsamling, exploratory data analyse og data kommunikation.

Basal informationer

Her findes informationerne omkring gruppen der har lavet dette projekt

#Gruppemedlemmer: Regitze og Silje
#Studieretning: Biologi
#Alias: Regilje

Projekt

Opsætning af data

Libraries

library(tidyverse)
library(lubridate)
library(plotly)

Indlæs data

d <- read_delim("Data/KARSELOG.TXT", delim = "\t")
## Warning: One or more parsing issues, call `problems()` on your data frame for details,
## e.g.:
##   dat <- vroom(...)
##   problems(dat)
## Rows: 67069 Columns: 5
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: "\t"
## chr (1): DatoTid
## dbl (4): Temperatur, Luftfugtighed, Afstand, Lys
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

Rå data

head(d) ## Viser første 6 rækker
## # A tibble: 6 × 5
##   DatoTid             Temperatur Luftfugtighed Afstand   Lys
##   <chr>                    <dbl>         <dbl>   <dbl> <dbl>
## 1 2000-00-00T00:00:00         22            45     117    49
## 2 2000-00-00T00:00:00         22            45     117    27
## 3 2000-00-00T00:00:00         22            45     116    28
## 4 2000-00-00T00:00:00         22            45     117    28
## 5 2000-00-00T00:00:00         22            45     116    28
## 6 2000-00-00T00:00:00         22            45     120    28
tail(d) ## Viser de sidste 6 rækker
## # A tibble: 6 × 5
##   DatoTid             Temperatur Luftfugtighed Afstand   Lys
##   <chr>                    <dbl>         <dbl>   <dbl> <dbl>
## 1 2024-03-05T13:48:58         21            40      82    29
## 2 2024-03-05T13:49:06         21            40      82    29
## 3 2024-03-05T13:49:14         21            40      82    29
## 4 2024-03-05T13:49:22         21            40      82    30
## 5 2024-03-05T13:49:31         21            40      83    30
## 6 2024-03-05T13:49:39         21            40      83    30
glimpse(d) ## Fortæller om kolloner og antal rækker
## Rows: 67,069
## Columns: 5
## $ DatoTid       <chr> "2000-00-00T00:00:00", "2000-00-00T00:00:00", "2000-00-0…
## $ Temperatur    <dbl> 22, 22, 22, 22, 22, 22, 22, 22, 23, 23, 23, 23, NA, 23, …
## $ Luftfugtighed <dbl> 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, NA, 45, …
## $ Afstand       <dbl> 117, 117, 116, 117, 116, 120, 118, 117, 118, 117, 118, 1…
## $ Lys           <dbl> 49, 27, 28, 28, 28, 28, 27, 27, 28, 28, 28, 27, NA, 50, …

Rens data

dc <- d %>%
  filter(DatoTid != "Dato")

dc$DatoTid <- as.POSIXct(dc$DatoTid, format="%Y-%m-%dT%H:%M:%OS", tz = "UTC")

dc <- dc[-(1:14),]

head(dc) ## Viser første 6 rækker
## # A tibble: 6 × 5
##   DatoTid             Temperatur Luftfugtighed Afstand   Lys
##   <dttm>                   <dbl>         <dbl>   <dbl> <dbl>
## 1 2024-02-27 21:27:34         23            45     114    27
## 2 2024-02-27 21:28:59         23            44     117    48
## 3 2024-02-27 21:29:21         23            45     115    53
## 4 2024-02-27 21:29:29         23            45     117    31
## 5 2024-02-27 21:29:37         23            44     117    31
## 6 2024-02-27 21:29:45         23            44     117    30
tail(dc) ## Viser de sidste 6 rækker
## # A tibble: 6 × 5
##   DatoTid             Temperatur Luftfugtighed Afstand   Lys
##   <dttm>                   <dbl>         <dbl>   <dbl> <dbl>
## 1 2024-03-05 13:48:58         21            40      82    29
## 2 2024-03-05 13:49:06         21            40      82    29
## 3 2024-03-05 13:49:14         21            40      82    29
## 4 2024-03-05 13:49:22         21            40      82    30
## 5 2024-03-05 13:49:31         21            40      83    30
## 6 2024-03-05 13:49:39         21            40      83    30
glimpse(dc) ## Fortæller om kolloner og antal rækker
## Rows: 67,048
## Columns: 5
## $ DatoTid       <dttm> 2024-02-27 21:27:34, 2024-02-27 21:28:59, 2024-02-27 21…
## $ Temperatur    <dbl> 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, …
## $ Luftfugtighed <dbl> 45, 44, 45, 45, 44, 44, 44, 44, 44, 43, 43, 43, 43, 43, …
## $ Afstand       <dbl> 114, 117, 115, 117, 117, 117, 115, 118, 117, 118, 115, 1…
## $ Lys           <dbl> 27, 48, 53, 31, 31, 30, 28, 26, 26, 26, 29, 30, 26, 26, …

Filter og formater data

ggplot(dc, aes(x = DatoTid, y = Afstand)) +
  geom_point()

rep_numbers <- rep(1:180, 10000)

dcf <- dc %>% 
  filter(Afstand < 135) %>% 
  mutate(indeks = rep_numbers[1:length(Afstand)]) %>%
  filter(indeks == 1) %>%
  filter(between(x = DatoTid, 
                 left = as.POSIXct("2024-02-27 21:27:34"), 
                 right = as.POSIXct("2024-03-05 13:49:39"))) %>%
  mutate(Vækst = max(Afstand) - Afstand) %>%
  select(-Afstand, -indeks)
dcfl <- pivot_longer(data = dcf, 
                     cols = c("Vækst", "Lys", "Luftfugtighed", "Temperatur"),
                     names_to = "variable",
                     values_to = "value")
head(dcfl)
## # A tibble: 6 × 3
##   DatoTid             variable      value
##   <dttm>              <chr>         <dbl>
## 1 2024-02-27 21:27:34 Vækst            14
## 2 2024-02-27 21:27:34 Lys              27
## 3 2024-02-27 21:27:34 Luftfugtighed    45
## 4 2024-02-27 21:27:34 Temperatur       23
## 5 2024-02-27 22:05:19 Vækst             2
## 6 2024-02-27 22:05:19 Lys              31
test <- ggplot(dcfl, aes(x = DatoTid, y = value, color = variable)) +
  geom_line() +
  facet_wrap(~variable, scales = "free_y", ncol = 1)

ggplotly(test)
dage <- dcf[(1:100),]

test <- ggplot(dage, aes(x = DatoTid, y = Vækst))+
  geom_line()

Arduino kode

GitHub

Informations om versioner

sessionInfo()
## R version 4.3.2 (2023-10-31 ucrt)
## Platform: x86_64-w64-mingw32/x64 (64-bit)
## Running under: Windows 11 x64 (build 22631)
## 
## Matrix products: default
## 
## 
## locale:
## [1] LC_COLLATE=Danish_Denmark.utf8  LC_CTYPE=Danish_Denmark.utf8   
## [3] LC_MONETARY=Danish_Denmark.utf8 LC_NUMERIC=C                   
## [5] LC_TIME=Danish_Denmark.utf8    
## 
## time zone: Europe/Copenhagen
## tzcode source: internal
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
##  [1] plotly_4.10.3   lubridate_1.9.3 forcats_1.0.0   stringr_1.5.1  
##  [5] dplyr_1.1.4     purrr_1.0.2     readr_2.1.4     tidyr_1.3.0    
##  [9] tibble_3.2.1    ggplot2_3.4.4   tidyverse_2.0.0
## 
## loaded via a namespace (and not attached):
##  [1] sass_0.4.7        utf8_1.2.4        generics_0.1.3    stringi_1.8.2    
##  [5] hms_1.1.3         digest_0.6.33     magrittr_2.0.3    evaluate_0.23    
##  [9] grid_4.3.2        timechange_0.2.0  bookdown_0.37     fastmap_1.1.1    
## [13] jsonlite_1.8.7    httr_1.4.7        fansi_1.0.5       crosstalk_1.2.1  
## [17] viridisLite_0.4.2 scales_1.3.0      lazyeval_0.2.2    jquerylib_0.1.4  
## [21] cli_3.6.1         crayon_1.5.2      rlang_1.1.2       ellipsis_0.3.2   
## [25] bit64_4.0.5       munsell_0.5.0     withr_2.5.2       cachem_1.0.8     
## [29] yaml_2.3.7        parallel_4.3.2    tools_4.3.2       tzdb_0.4.0       
## [33] colorspace_2.1-0  vctrs_0.6.5       R6_2.5.1          lifecycle_1.0.4  
## [37] bit_4.0.5         htmlwidgets_1.6.3 vroom_1.6.4       pkgconfig_2.0.3  
## [41] pillar_1.9.0      bslib_0.6.1       gtable_0.3.4      glue_1.6.2       
## [45] data.table_1.14.8 rmdformats_1.0.4  highr_0.10        xfun_0.41        
## [49] tidyselect_1.2.0  rstudioapi_0.15.0 knitr_1.45        farver_2.1.1     
## [53] htmltools_0.5.7   labeling_0.4.3    rmarkdown_2.25    compiler_4.3.2